{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\" 获取PDF表格中的数据 \"\"\"\n",
    "# 导入必要库\n",
    "import pdfplumber\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "pdf = pdfplumber.open(R'data\\2022comdesigngamelist.pdf')\n",
    "first_page = pdf.pages[1]\n",
    "table1 = first_page.extract_table()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "table = []\n",
    "for i in range(len(pdf.pages)):\n",
    "    if(i+1)<len(pdf.pages):\n",
    "      page = pdf.pages[i + 1]\n",
    "      table.extend(page.extract_table())\n",
    "\n",
    "# print(table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.DataFrame(table,columns=('学校','项目名称','大类','参赛学生','指导教师')).to_csv('2022年西北赛区推送国赛名单.csv',)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "西北赛区推送国赛名单表 = pd.DataFrame(table,columns=('学校','项目名称','大类','参赛学生','指导教师'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.index.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "c = df['学校'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "c = 西北赛区推送国赛名单表.学校.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['__abs__',\n",
       " '__add__',\n",
       " '__and__',\n",
       " '__bool__',\n",
       " '__ceil__',\n",
       " '__class__',\n",
       " '__delattr__',\n",
       " '__dir__',\n",
       " '__divmod__',\n",
       " '__doc__',\n",
       " '__eq__',\n",
       " '__float__',\n",
       " '__floor__',\n",
       " '__floordiv__',\n",
       " '__format__',\n",
       " '__ge__',\n",
       " '__getattribute__',\n",
       " '__getnewargs__',\n",
       " '__gt__',\n",
       " '__hash__',\n",
       " '__index__',\n",
       " '__init__',\n",
       " '__init_subclass__',\n",
       " '__int__',\n",
       " '__invert__',\n",
       " '__le__',\n",
       " '__lshift__',\n",
       " '__lt__',\n",
       " '__mod__',\n",
       " '__mul__',\n",
       " '__ne__',\n",
       " '__neg__',\n",
       " '__new__',\n",
       " '__or__',\n",
       " '__pos__',\n",
       " '__pow__',\n",
       " '__radd__',\n",
       " '__rand__',\n",
       " '__rdivmod__',\n",
       " '__reduce__',\n",
       " '__reduce_ex__',\n",
       " '__repr__',\n",
       " '__rfloordiv__',\n",
       " '__rlshift__',\n",
       " '__rmod__',\n",
       " '__rmul__',\n",
       " '__ror__',\n",
       " '__round__',\n",
       " '__rpow__',\n",
       " '__rrshift__',\n",
       " '__rshift__',\n",
       " '__rsub__',\n",
       " '__rtruediv__',\n",
       " '__rxor__',\n",
       " '__setattr__',\n",
       " '__sizeof__',\n",
       " '__str__',\n",
       " '__sub__',\n",
       " '__subclasshook__',\n",
       " '__truediv__',\n",
       " '__trunc__',\n",
       " '__xor__',\n",
       " 'as_integer_ratio',\n",
       " 'bit_length',\n",
       " 'conjugate',\n",
       " 'denominator',\n",
       " 'from_bytes',\n",
       " 'imag',\n",
       " 'numerator',\n",
       " 'real',\n",
       " 'to_bytes']"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dir(1)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "9650cb4e16cdd4a8e8e2d128bf38d875813998db22a3c986335f89e0cb4d7bb2"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
